all_genes = read.csv('~/data/feature_file/gene_master.csv')
all_genes = na.omit(all_genes) #get genes with TSS coordinate

gene_700 = subset(all_genes,length>700)

file_name = c("cac_pulse")
file_extension= c("_sampled")

#define the nucleosomal read boudaries 
length_min = 20
length_max = 80


acf_lag=c()

#read in each chromosome information
for(g in 1:nrow(gene_700)){
  chr = gene_700$chr[g]
  if(gene_700$strand[g] =='+'){
    new_start = gene_700$tss[g]
    new_end = gene_700$tss[g]+700
  }else{
    new_start = gene_700$tss[g]-700
    new_end = gene_700$tss[g]
  }
  
  # create a GenomicRanges object with the information above to retrieve the read information from the bam file
  chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
  
  p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
  
  for (f in 1:1){
    
    #data files
    file_name.bam = (paste("/data/bam_bai/",file_name[f],file_extension[f],".bam", sep=''))
    file_name.bam.bai = paste("/data/bam_bai/",file_name[f],file_extension[f],".bam.bai",sep='')  
    
    A_reads.l = scanBam(file = file_name.bam, 
                        index = file_name.bam.bai,
                        param = p)
    
    #create a new GenomicRanges object for the reads from this list:
    A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                         ranges = IRanges(start = A_reads.l[[1]]$pos, 
                                          width = A_reads.l[[1]]$isize))
    
    #find bp overlap with midpoints of reads. 
    mat.gr = GRanges(seqnames = chr, ranges = IRanges(start= seq(new_start, new_end, by =1), width=1 ))  
    
    #taking only the midpoint
    subset_data.gr = A_reads.gr[which(width(A_reads.gr) >= length_min & width(A_reads.gr) <= length_max)]
    ranges(subset_data.gr)<- IRanges(start = mid(ranges(subset_data.gr)),width = 1)
    window_1.gr <- GRanges(seqnames = chr, IRanges(start = seq(new_start,new_end, by = 1),width = 1))
    mid = countOverlaps(window_1.gr, subset_data.gr)
    if(sum(mid)==0){
      next
    }
    
    A <- acf(mid,lag.max = 200,plot = FALSE)
    cor = A$acf[-(1:20),,1]
    lag = A$lag[-(1:20),,1]
    acf_lag= c(acf_lag,lag[which.max(cor)])
    
    
  }# end file for
  
  if(g %% 100 ==0){
    cat(paste("Gene # ",g,'\n'))
  }
}# end gene for

#plotting
png(file = '~/data/figure/supple_smallFrag_phasing.png', width = 3.5, height = 2.5, units = "in", res = 300, bg = "white", type = "cairo-png" )
par(mar = c(4,4,4,4), cex=0.8, cex.main=0.7) 
plot(density(acf_lag,bw=5),xlim=c(20,220),lwd=3,xlab='Subnucleosomal fragment phasing (bp) ', main ='')
dev.off()

